####################
#Distance to Bosnia#
####################
#This to obtain:
#table_A16.tex

library("broom")
library("dplyr")
library("grid")
library("tidyr")
library("sandwich")
library("lfe")
library("broom")
library("dplyr")
library("tidyr")
library("grid")
library("stargazer")
library("lmtest")
library("multiwayvcov")
library("ggplot2")
library("glue")
library("spdep")
library("spatialreg")
library("stringr")

setwd("/Users/bgpopescu/")
#setwd("C:/Users/bogdanp/")

#################################
#Function for spatial regression#
#################################
spatial_regression <- function(data_sample, y, x, unique_observation_id) {
  clean_x = clean_list(x, "quad")
  clean_x = clean_list(clean_x, "bfe")
  
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(clean_x, collapse = "+")))
  bw<-subset(data_sample, select =c(x, y, unique_observation_id))
  #Removing NAs
  bw<-bw[!sf::st_is_empty(bw), ]
  bw<-na.omit(bw)
  #Step1: the function "poly2nb" builds a neighbours list based on
  list_nb=poly2nb(bw, row.names = dplyr::pull(bw, unique_observation_id))
  #Step2: Convert nb to listw type
  list_nb_w=tryCatch(nb2listw(list_nb, zero.policy=TRUE), error=function(err) NA)
  #Step3: Spatial Filtering
  reg1_res_sp<-tryCatch(SpatialFiltering(specification,
                                         data = bw,
                                         nb = list_nb,
                                         style="W",
                                         ExactEV = F,
                                         zero.policy = T,
                                         na.action=na.exclude),
                        error=function(err) NA)
  
  print("Done Spatial Filtering")
  #Step4: Include fitted results"
  bw_fitted<-tryCatch(cbind(bw, as.data.frame(fitted(reg1_res_sp)),
                            id = row.names(bw)), error=function(err) 
                            {print("NO NEIGHBORS"); data_sample})
  #Keeping track of indices
  #This will allow us to see exactly which observations were used in the regressions
  row.names(bw_fitted) <- bw_fitted$id
  bw_fitted<-subset(bw_fitted, select = -c(id))
  list_fitted_ei<-names(bw_fitted[, grepl("vec", names(bw_fitted))])
  list_fitted_ei<-list_fitted_ei[list_fitted_ei != "Shape"]
  all_vars<-append(x, list_fitted_ei)
  specification_with_eigenv = as.formula(paste(glue("{y}~"), 
                                               paste(all_vars, collapse = "+")))
  
  print(specification_with_eigenv)
  model1 <- tryCatch(lm(specification_with_eigenv, data = bw_fitted, na.action = na.exclude), error=function(err) NA)
  
  reg_moran<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$statistic
  reg_moran_p<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$p.value
  reg_moran_star<-paste(round(reg_moran, digits = 3), "",
                        ifelse(reg_moran_p<.01,"***",
                               ifelse(reg_moran_p<.05,"**",
                                      ifelse(reg_moran_p<.1,"*",
                                             ""))), "", "", sep = "")
  
  
  return(list(model1, reg_moran_star))
}


######################################
#Function for cleaning the list of FE#
#####################################

clean_list <- function(old_list, particle) {
  term <- paste(glue("^{particle}"))
  new_list = list() 
  for (i in old_list) {
    #print(i)
    temp_list = c()
    
    for (j in i){
      if (!grepl(term, j)){
        temp_list <- c(temp_list, j)
      }
    }
    l = length(new_list)
    new_list[[l+1]] <- temp_list
  }
  return(new_list)}

#clean_list(specifications, "SN_")


###############################
#Function for rerunning models#
###############################
rerun_regression<-function(regression_name, string_dv, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes already existing regression objects, associates them with an 
  #"official" label, calculates robust clustered SE and CI based on a cluster
  #that is in the original dataset
  #and puts the results in a tibble
  #-this will be used for ggplot later
  
  #Input:
  #-Regression model object after lm
  #-The label for the DV that you want displayed in ggplot
  #-original dataset that contains thhe names of the clusters
  
  #Output:
  #-A tibble with regression coefficients from the model that you ran with:
  #--robust SE
  #--robust CI
  
  #Recreate dataframe from the regression object
  remake_data<-regression_name$model
  #Scale the DV
  remake_data[1]<-scale(remake_data[1])
  #Take the string of the DV and IV
  dv<-regression_name$terms[[2]]
  ivs<-regression_name$terms[[3]]
  ivs_str<-paste(ivs)[2]
  dv_str<-paste(dv, "~", collapse = "")
  dv_ivs_str<-paste(dv_str, ivs_str, sep=" ")
  #Create specification and run model
  specification = as.formula(dv_ivs_str)
  model <- lm(specification, data = remake_data)
  no_obs<-nobs(model)
  no_obs<-paste("Obs: ", as.character(no_obs), sep = "")
  #Calculate robust SE
  model_se<-se_robust_cluster(model, orig_data_for_clusters, cluster_name)
  #Create tibble
  model_tidy<-tidy(model)
  model_tidy$no_obs<-no_obs
  model_se_tidy<-tidy(model_se)
  names(model_se_tidy)[1] <- "term"
  names(model_se_tidy)[2] <- "robust.se"
  model_tidy2<-left_join(model_tidy, model_se_tidy, by = c("term"="term"))
  
  #Calculate CI
  df_ci<-as.data.frame(ci_robust_cluster(model, orig_data_for_clusters, cluster_name))
  df_ci <- cbind(term = rownames(df_ci), df_ci)
  names(df_ci)[2] <- "lb_95"
  names(df_ci)[3] <- "ub_95"
  
  #Merge CI to the dataframe
  model_tidy3<-left_join(model_tidy2, df_ci, by = c("term"="term"))
  #Only keep the treat variable
  model_tidy4<-subset(model_tidy3, term=="treat")
  model_tidy4$dv_official<-string_dv
  return(model_tidy4)
}

se_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  return(res)}

ci_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res_ci)}


#Reading Polygons
x<-st_layers(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb")


HRV_adm0 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm0_wgs")

krajna <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                  layer="krajna_line_GCS")

HRV_adm3 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm3")
HRV_adm3<-st_simplify(HRV_adm3, dTolerance = 0.005)
HRV_adm3<-subset(HRV_adm3, select = c(ID_2))

HRV_adm3_data = read_excel("./Dropbox/Legacies_Project/Analysis/data/merge.xlsx", sheet=1, col_names = TRUE, skip = 0)

final_sp<-left_join(HRV_adm3, HRV_adm3_data, by = c("ID_2"="ID_2"))
final_sp$bosnia_border_mun
data<-subset(final_sp, bosnia_border_mun==0)


specifications <- list(c('treat', 
                         'POINT_X', 'POINT_Y', 
                         'zagreb_distance', 
                         'bosnia_distance',
                         'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7',
                         'quad1', 'quad2', 'quad3'),
                       c('treat',
                         'treat_bosnia_distance',
                         'bosnia_distance',
                         'POINT_X', 'POINT_Y', 
                         'zagreb_distance', 
                         'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7',
                         'quad1', 'quad2', 'quad3'))


###############################
#Effect of Proximity to Bosnia#
###############################

pct_no_water_bosn_list<-spatial_regression(data, "pct_no_water", specifications[[1]], "ID_2")
pct_no_water_bosn<-pct_no_water_bosn_list[[1]]
pct_no_water_bosn_m<-pct_no_water_bosn_list[[2]]

pct_no_water_bosn_int_list<-spatial_regression(data, "pct_no_water", specifications[[2]], "ID_2")
pct_no_water_bosn_int<-pct_no_water_bosn_int_list[[1]]
pct_no_water_bosn_int_m<-pct_no_water_bosn_int_list[[2]]

pct_no_sewer_bosn_list<-spatial_regression(data, "pct_no_sewer", specifications[[1]], "ID_2")
pct_no_sewer_bosn<-pct_no_sewer_bosn_list[[1]]
pct_no_sewer_bosn_m<-pct_no_sewer_bosn_list[[2]]

pct_no_sewer_bosn_int_list<-spatial_regression(data, "pct_no_sewer", specifications[[2]], "ID_2")
pct_no_sewer_bosn_int<-pct_no_sewer_bosn_int_list[[1]]
pct_no_sewer_bosn_int_m<-pct_no_sewer_bosn_int_list[[2]]

pov_rate_income_bosn_list<-spatial_regression(data, "pov_rate_income", specifications[[1]], "ID_2")
pov_rate_income_bosn<-pov_rate_income_bosn_list[[1]]
pov_rate_income_bosn_m<-pov_rate_income_bosn_list[[2]]

pov_rate_income_bosn_int_list<-spatial_regression(data, "pov_rate_income", specifications[[2]], "ID_2")
pov_rate_income_bosn_int<-pov_rate_income_bosn_int_list[[1]]
pov_rate_income_bosn_int_m<-pov_rate_income_bosn_int_list[[2]]

share_lesseduc_25_list<-spatial_regression(data, "share_lesseduc_25", specifications[[1]], "ID_2")
share_lesseduc_25_bosn<-share_lesseduc_25_list[[1]]
share_lesseduc_25_bosn_m<-share_lesseduc_25_list[[2]]

share_lesseduc_25_int_list<-spatial_regression(data, "share_lesseduc_25", specifications[[2]], "ID_2")
share_lesseduc_25_bosn_int<-share_lesseduc_25_int_list[[1]]
share_lesseduc_25_bosn_int_m<-share_lesseduc_25_int_list[[2]]

######################
#Step1: RELEVANT VARS#
######################

relevant_vars<-c("pct_no_water", 
                 "pov_rate_income",
                 "share_lesseduc_25")

#####################
#Step2: MEANS AND SD#
#####################

means<-list()
sds<-list()
for (i in relevant_vars){
  item_mean<-round(mean(data[[i]], na.rm=T), 3)
  item_sd<-round(sd(data[[i]], na.rm=T), 3)
  means[[length(means) + 1]] <- item_mean 
  sds[[length(sds) + 1]] <- item_sd 
}
means<-unlist(means)
sds<-unlist(sds)

####################
#Step3: LIST MODELS#
####################

list_models<-list(pct_no_water_bosn_int, 
                  pov_rate_income_bosn_int,
                  share_lesseduc_25_bosn_int)

stargazer(pct_no_water_bosn_int, 
          pov_rate_income_bosn_int,
          share_lesseduc_25_bosn_int,
          keep = c("treat", "bosnia_distance", "treat_bosnia_distance"))

###########################
#Step4: MORAN I RESID LIST#
###########################

list_moran_i<-c(pct_no_water_bosn_int_m, 
                pov_rate_income_bosn_int_m,
                share_lesseduc_25_bosn_int_m)

######################
#Step5: Column labels#
######################

column_labels<-c("\\shortstack{Pct. Dwellings\\\\ No Water\\\\ 2011}",
                 "\\shortstack{Pct. People at\\\\ Risk of Poverty\\\\ 2011}",
                 "\\shortstack{Pct. Less\\\\ Educated People\\\\ 2011}")

bosn_tour=stargazer(list_models,
                    column.labels= column_labels,
                    keep = c("treat", "bosnia_distance", "treat_bosnia_distance"),
                    se = list(se_robust_cluster(pct_no_water_bosn_int, data, "NAME_1"),
                              se_robust_cluster(pov_rate_income_bosn_int, data, "NAME_1"),
                              se_robust_cluster(share_lesseduc_25_bosn_int, data, "NAME_1")),
                    covariate.labels=c("Military Territory", 
                                       "Military Territory x Distance Bosnia in km",
                                       "Distance Bosnia in km"),
                    dep.var.caption = "Dependent variable",
                    dep.var.labels.include = F,
                    omit.stat=c("LL","ser","f", "rsq"), no.space=TRUE, float=FALSE,
                    add.lines=list(c("Mean", means),
                                   c("SD", sds),
                                   c("Boundary FE", rep("Yes", 5)),
                                   c("Region FE", rep("Yes", 5)),
                                   c("Moran eigenvectors", rep("Yes", 10)),
                                   c("Moran's I Residual", list_moran_i)))
note_latex <- "\\multicolumn{4}{l} {\\parbox[t]{17cm}{ \\footnotesize \\textit{Notes:} 
Coefficients and County clustered robust standard errors in parantheses from OLS regression. 
$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.
The unit of analysis is municipality (opcina). 
See codebook in the appendix for data sources and how the variables were calculated.}}"
bosn_tour[grepl("Note",bosn_tour)] <- note_latex
#cat(bosn_tour, file="bosn_tour.tex", sep="\n")
cat(bosn_tour, file="./Dropbox/Legacies_Project/Paper/tables/table_A16.tex", sep="\n")
